#' ---
#' title: "Agenda Seeding: Rainfall Data Prep"
#' date: "`r Sys.Date()`"
#' output: html_document
#' header-includes:
#'  - \usepackage{booktabs}
#'  - \usepackage{longtable}
#'  - \usepackage{array}
#'  - \usepackage{multirow}
#'  - \usepackage{wrapfig}
#'  - \usepackage{float}
#'  - \usepackage{colortbl}
#'  - \usepackage{pdflscape}
#'  - \usepackage{tabu}
#'  - \usepackage{threeparttable}
#'  - \usepackage{threeparttablex}
#'  - \usepackage[normalem]{ulem}
#'  - \usepackage{makecell}
#'  - \usepackage{dcolumn}
#'  - \usepackage{setspace}\doublespacing
#' ---



## ---- data_prep_spin_code, eval = FALSE, include = FALSE ----
# spin code to output Rmd / Rnw
# set output_format to "html_document" for html
# rmarkdown::render(input = here::here("code", "weather_precip_station_merge3.R"), output_format = "pdf_document", clean = TRUE)


## ---- load_packages, include = FALSE ----

library(zoo)
library(here)
library(dplyr)
library(stringr)

## ---- load_data, include = FALSE ----

weather <-
    read.csv(
        here("data/weather/precip_1968_04_all_stations.txt"),
        header     = TRUE,
        na.strings = c("-99999", "99999")
    ) 

dim(weather)

## ---- scrub_data, include = FALSE ----

# remove divider row
weather <- weather[-grep("----", weather$DSET), ] 

# remove unnecessary columns
weather <- weather[,-grep("^F", names(weather))] 

# remove unnecessary columns#
weather <- weather[,-grep("DAHR", names(weather))] 

str(weather)

# remove COOPIDs with "999999" due to inabilit to merge on duplicates
w2 <- weather[weather$COOPID != "999999", ] 

w2$COOPID <- as.character(w2$COOPID)


## ---- load_station_data, include = FALSE ----

## load weather station data


stations <-
    read.csv(
        here("data/weather/complete_station_list2.csv"),
        header     = TRUE,
        na.strings = c("-99999", "99999")
    )

# check number of chars in coopid
table(nchar(as.character(stations$COOPID)))

# adjust all to six digits, some with leading zero
stations <- stations %>%
    mutate(
        COOPID = COOPID %>% as.character() %>%
                   str_pad(width = 6, side = "left", pad = "0")
    )

# deprecated for str_pad
# ## identify which COOPIDs are only five digits
# five <- which(nchar(as.character(stations$COOPID)) == 5)
# 
# ## add a zero before any COOPID that is only five characters
# stations$COOPID <- ifelse(
#     nchar(as.character(stations$COOPID)) == 5,
#     paste0("0", as.character(stations$COOPID)),
#     as.character(stations$COOPID)
# )

stations$COOPID <- as.character(stations$COOPID)

## ---- merge_precipitation_station_data, include = FALSE ----

w3 <- merge(w2, stations, by.x = "COOPID", by.y = "COOPID")

dim(w3)
names(w3) <- tolower(names(w3))
names(w3) <- str_replace_all(names(w3), "\\.", "_")

dupes <- which(duplicated(w3$coopid))
length(dupes)
w3 <- w3[-dupes, ]

daycols <- grep("day", names(w3))

## convert w3 rain cells from factors to numeric
numericize <- function(x) {
    as.numeric(as.character(unlist(x)))
}

w3[, daycols] <- sapply(w3[, daycols], numericize)

## convert cells with 99999 to NA
nine.to.na <- function(x) {
    gsub("99999", NA, x)
}

w3[, daycols] <- sapply(w3[, daycols], nine.to.na)

w3[, daycols] <- sapply(w3[, daycols], numericize)


w3$station_name_x <- str_trim(as.character(w3$station_name_x))

w3$station_name_y <- str_trim(as.character(w3$station_name_y))



# drop non-contiguous states
AKHI <- grep("AK|HI", w3$st)
length(AKHI)

w3 <- w3[-AKHI, ]

# Drop summary rows for whole US
w3 <- w3[-which(w3$country != "UNITED STATES"), ]


## ---- calc_precipitation_station_data_for_periods, include = FALSE ----

# Three days prior to MLK assassination
w3$prethreeday <- apply(w3[, c("day01", "day02", "day03")], 1, sum, na.rm = TRUE)

# Month excluding placebo days Apr 1-3
w3$rain_month <- rowSums(w3[, daycols[4:30]], na.rm = TRUE) 

# Week following assassination, days Apr 4-10
w3$rain_week <- rowSums(w3[, c("day04", "day05", "day06", "day07", "day08", "day09", "day10")], na.rm = TRUE)

# Month excluding placebo days Apr 1-10
w3$rain_month_placebo <- rowSums(w3[, daycols[11:30]], na.rm = TRUE) 

## ---- final_data_scrub, include = FALSE ----

# fix missing decimal in weather lat lon data
w3[, c("lon", "lat")] <-
    w3[, c("lon", "lat")] * .01 

w3small <-
    w3[, c(
        "coopid",
        "station_name_x",
        "station_name_y",
        "yearmo",
        "st" ,
        "county",
        "lat",
        "lon",
        "rain_month",
        "rain_week",
        "prethreeday",
        "rain_month_placebo"
    )]

icst <-
    read.csv(
        here("data/icpsr_states_fips_census.csv"),
        header = TRUE
    )


## merge voting data and abbv. state names
w4 <- merge(w3small, icst, by.x = "st", by.y = "STATEAB")

names(w4) <- tolower(names(w4))

dim(w4)
names(w4)
head(w4)

w4 <- w4 %>% 
    rename(state  = name) %>% 
    mutate(
        state  = state %>% as.character() %>% tolower(),
        county = tolower(county) %>% 
                   sub("^(.*) (County|[Cc]ity|Parish), ..$", "\\1", .),
        mpname = paste(state, county, sep = ",")
           )


head(w4)
#w4

save(w4, file = here("data/rainfall_geocoded.Rdata") )


## ---- create_codebook, eval = FALSE, include = FALSE ----

dataMaid::makeCodebook(
    w4,
    file        = here("codebooks/codebook_rainfall.Rmd"),
    reportTitle = "Codebook for Rainfall Data, April 1968",
    # suppress chr/fct output
    checks      = list(character = NULL, factor = NULL), 
    replace     = TRUE
)


